
# 1、创建环境
from pyspark.context import SparkContext

sc = SparkContext(master='local', appName='demo2_map')

# 2、读取数据
rdd1 = sc.textFile("../../data/students.txt")

rdd2 = sc.textFile("../../data/students.txt")

# union: 合并两个rdd, 不会产生shuffle
union_rdd = rdd1.union(rdd2)

union_rdd.foreach(print)

# distinct:去重, 会产生shuffle
distinct_rdd = union_rdd.distinct()

distinct_rdd.foreach(print)


while True:
    pass